library(data.table)
library(dplyr)
library(tidyr)
library(readxl)
library(biomaRt)
library(forcats)
library(qqman)
library(lmtest)
library(kableExtra)
library(Biobase)

Results

For the results I show both the hard data for each model as well as a quantile-quantile plot and manhattan plot. For the latter I set the significance equal to the standard 0.05 and divided by the number of variants (16). This is then plotted as the -log10 of the P-Value for each SNP and shown on the Y-axis. This is the level at which a SNP becomes significant in our analysis.

qq(results$P, cex=1, main="QQ-Plot")

manhattan(results,chr="CHR",bp="BP",p="P",snp="SNP",col=c("orange","purple"),chrlabs=NULL,annotatePval=NULL,annotateTop=TRUE, ylim=c(0,4), suggestiveline=-log10(0.05/16), main="Manhattan Plot Normal Covariates")

As none of the SNPs show significance we move on to a systematic removal of covariates in order to see if this changes the significance of our results.

Minus Sex

Fisrt we look at the model without the Sex covariate only.

MinusSex <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/MinusAge.PHENO1.glm.logistic", 
                  sep="\t", header=T)
MinusSex
##     #CHROM      POS         ID REF ALT A1 TEST OBS_CT       OR LOG(OR)_SE
##  1:      2 85666618  rs1130866   T   C  C  ADD    254 0.948712   0.205275
##  2:      2 85667185  rs3024798   C   A  A  ADD    128 0.576672   0.302266
##  3:      2 85668215  rs2077079   A   C  C  ADD    128 0.795599   0.278560
##  4:      8 22163524     rs4715   C   A  A  ADD    255 1.068810   0.230825
##  5:      8 22164004     rs1124   G   A  A  ADD    255 1.115600   0.227948
##  6:     10 79557289  rs1965708   C   A  A  ADD    390 0.827922   0.182307
##  7:     10 79557536  rs1965707   C   T  T  ADD    390 1.018450   0.161078
##  8:     10 79558907 rs17886395   G   C  C  ADD    390 0.757868   0.125969
##  9:     10 79559458  rs1059046   A   C  C  ADD    261       NA         NA
## 10:     10 79611881  rs1059047   T   C  C  ADD    263       NA         NA
## 11:     10 79611973  rs1136450   G   C  C  ADD    263       NA         NA
## 12:     10 79612325  rs1136451   A   G  G  ADD    392 1.243010   0.183416
## 13:     10 79613765  rs1059057   A   G  G  ADD    384 1.057330   0.248981
## 14:     10 79614021  rs4253527   C   T  T  ADD    391 1.391180   0.238670
## 15:     10 79941966  rs2243639   G   A  A  ADD    269       NA         NA
## 16:     10 79946568   rs721917   T   C  C  ADD    400 1.237880   0.151189
##        Z_STAT         P
##  1: -0.256484 0.7975770
##  2: -1.821190 0.0685784
##  3: -0.820862 0.4117250
##  4:  0.288282 0.7731310
##  5:  0.479917 0.6312870
##  6: -1.035820 0.3002880
##  7:  0.113501 0.9096340
##  8: -2.200900 0.0277429
##  9:        NA        NA
## 10:        NA        NA
## 11:        NA        NA
## 12:  1.186000 0.2356210
## 13:  0.223902 0.8228340
## 14:  1.383290 0.1665760
## 15:        NA        NA
## 16:  1.411490 0.1580990
qq(MinusSex$P, cex=1, main="QQ-Plot")

manhattan(MinusSex,chr="#CHROM",bp="POS",p="P",snp="SNP",col=c("orange","purple"),chrlabs=NULL,annotatePval=NULL,annotateTop=TRUE, ylim=c(0,4), suggestiveline=-log10(0.05/16), main="Manhattan Plot Age and Ancestry")
## Warning in manhattan(MinusSex, chr = "#CHROM", bp = "POS", p = "P", snp =
## "SNP", : No SNP column found. OK unless you're trying to highlight.

Minus Age

Next we look at the model without the Age covariate only.

MinusAge <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/MinusSex.PHENO1.glm.logistic", 
      sep="\t", header=T)
MinusAge
##     #CHROM      POS         ID REF ALT A1 TEST OBS_CT       OR LOG(OR)_SE
##  1:      2 85666618  rs1130866   T   C  C  ADD    254 0.935605   0.205699
##  2:      2 85667185  rs3024798   C   A  A  ADD    128 0.578317   0.302508
##  3:      2 85668215  rs2077079   A   C  C  ADD    128 0.792010   0.278736
##  4:      8 22163524     rs4715   C   A  A  ADD    255 1.085560   0.230701
##  5:      8 22164004     rs1124   G   A  A  ADD    255 1.148210   0.228619
##  6:     10 79557289  rs1965708   C   A  A  ADD    390 0.831288   0.182037
##  7:     10 79557536  rs1965707   C   T  T  ADD    390 1.018560   0.162034
##  8:     10 79558907 rs17886395   G   C  C  ADD    390 0.755126   0.126119
##  9:     10 79559458  rs1059046   A   C  C  ADD    261       NA         NA
## 10:     10 79611881  rs1059047   T   C  C  ADD    263       NA         NA
## 11:     10 79611973  rs1136450   G   C  C  ADD    263       NA         NA
## 12:     10 79612325  rs1136451   A   G  G  ADD    392 1.242940   0.183421
## 13:     10 79613765  rs1059057   A   G  G  ADD    384 1.059340   0.249145
## 14:     10 79614021  rs4253527   C   T  T  ADD    391 1.389000   0.239199
## 15:     10 79941966  rs2243639   G   A  A  ADD    269       NA         NA
## 16:     10 79946568   rs721917   T   C  C  ADD    400 1.234390   0.151381
##        Z_STAT         P
##  1: -0.323591 0.7462480
##  2: -1.810310 0.0702473
##  3: -0.836564 0.4028370
##  4:  0.355867 0.7219400
##  5:  0.604503 0.5455090
##  6: -1.015060 0.3100760
##  7:  0.113477 0.9096520
##  8: -2.227030 0.0259453
##  9:        NA        NA
## 10:        NA        NA
## 11:        NA        NA
## 12:  1.185700 0.2357410
## 13:  0.231362 0.8170340
## 14:  1.373700 0.1695360
## 15:        NA        NA
## 16:  1.391030 0.1642160
qq(MinusAge$P, cex=1, main="QQ-Plot")

manhattan(MinusAge,chr="#CHROM",bp="POS",p="P",snp="SNP",col=c("orange","purple"),chrlabs=NULL,annotatePval=NULL,annotateTop=TRUE, ylim=c(0,4), suggestiveline=-log10(0.05/16), main="Manhattan Plot Sex and Ancestry")
## Warning in manhattan(MinusAge, chr = "#CHROM", bp = "POS", p = "P", snp =
## "SNP", : No SNP column found. OK unless you're trying to highlight.

Minus Sex & Age

Then we look at the model without both Sex and Age.

MinusSexandAge <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/MinusSexandAge.PHENO1.glm.logistic", 
      sep="\t", header=T)
MinusSexandAge
##     #CHROM      POS         ID REF ALT A1 TEST OBS_CT       OR LOG(OR)_SE
##  1:      2 85666618  rs1130866   T   C  C  ADD    254 0.941023   0.204836
##  2:      2 85667185  rs3024798   C   A  A  ADD    128 0.573536   0.301799
##  3:      2 85668215  rs2077079   A   C  C  ADD    128 0.791666   0.278062
##  4:      8 22163524     rs4715   C   A  A  ADD    255 1.078470   0.230226
##  5:      8 22164004     rs1124   G   A  A  ADD    255 1.125570   0.226898
##  6:     10 79557289  rs1965708   C   A  A  ADD    390 0.833586   0.181736
##  7:     10 79557536  rs1965707   C   T  T  ADD    390 1.022750   0.160758
##  8:     10 79558907 rs17886395   G   C  C  ADD    390 0.756856   0.125902
##  9:     10 79559458  rs1059046   A   C  C  ADD    261       NA         NA
## 10:     10 79611881  rs1059047   T   C  C  ADD    263       NA         NA
## 11:     10 79611973  rs1136450   G   C  C  ADD    263       NA         NA
## 12:     10 79612325  rs1136451   A   G  G  ADD    392 1.243130   0.183393
## 13:     10 79613765  rs1059057   A   G  G  ADD    384 1.057700   0.248975
## 14:     10 79614021  rs4253527   C   T  T  ADD    391 1.390620   0.238596
## 15:     10 79941966  rs2243639   G   A  A  ADD    269       NA         NA
## 16:     10 79946568   rs721917   T   C  C  ADD    400 1.237710   0.151152
##        Z_STAT         P
##  1: -0.296761 0.7666490
##  2: -1.842070 0.0654645
##  3: -0.840156 0.4008210
##  4:  0.328142 0.7428040
##  5:  0.521347 0.6021250
##  6: -1.001550 0.3165610
##  7:  0.139934 0.8887120
##  8: -2.212690 0.0269188
##  9:        NA        NA
## 10:        NA        NA
## 11:        NA        NA
## 12:  1.186680 0.2353550
## 13:  0.225297 0.8217480
## 14:  1.382040 0.1669590
## 15:        NA        NA
## 16:  1.410890 0.1582770
qq(MinusSexandAge$P, cex=1, main="QQ-Plot")

manhattan(MinusSexandAge,chr="#CHROM",bp="POS",p="P",snp="SNP",col=c("orange","purple"),chrlabs=NULL,annotatePval=NULL,annotateTop=TRUE, ylim=c(0,4), suggestiveline=-log10(0.05/16), main="Manhattan Plot Ancestry Only")
## Warning in manhattan(MinusSexandAge, chr = "#CHROM", bp = "POS", p = "P", : No
## SNP column found. OK unless you're trying to highlight.

No Covariates

Finally we remove all covariates from the model.

NoCov <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/NoCov.PHENO1.glm.logistic", 
               sep="\t", header=T)
NoCov
##     #CHROM      POS         ID REF ALT A1 TEST OBS_CT       OR LOG(OR)_SE
##  1:      2 85666618  rs1130866   T   C  C  ADD    254 0.947127   0.199107
##  2:      2 85667185  rs3024798   C   A  A  ADD    128 0.540041   0.292559
##  3:      2 85668215  rs2077079   A   C  C  ADD    128 0.727546   0.270009
##  4:      8 22163524     rs4715   C   A  A  ADD    255 1.033200   0.225067
##  5:      8 22164004     rs1124   G   A  A  ADD    255 1.041420   0.218707
##  6:     10 79557289  rs1965708   C   A  A  ADD    391 0.874637   0.178413
##  7:     10 79557536  rs1965707   C   T  T  ADD    391 1.047800   0.158849
##  8:     10 79558907 rs17886395   G   C  C  ADD    391 0.762683   0.124702
##  9:     10 79559458  rs1059046   A   C  C  ADD    262 0.935887   0.187012
## 10:     10 79611881  rs1059047   T   C  C  ADD    264 1.329150   0.319219
## 11:     10 79611973  rs1136450   G   C  C  ADD    264 0.900438   0.181578
## 12:     10 79612325  rs1136451   A   G  G  ADD    393 1.262640   0.180970
## 13:     10 79613765  rs1059057   A   G  G  ADD    385 1.075390   0.246670
## 14:     10 79614021  rs4253527   C   T  T  ADD    392 1.395450   0.236007
## 15:     10 79941966  rs2243639   G   A  A  ADD    270 0.654964   0.190580
## 16:     10 79946568   rs721917   T   C  C  ADD    401 1.260120   0.149840
##        Z_STAT         P
##  1: -0.272830 0.7849840
##  2: -2.105930 0.0352101
##  3: -1.178030 0.2387860
##  4:  0.145123 0.8846140
##  5:  0.185548 0.8527990
##  6: -0.750767 0.4527930
##  7:  0.293919 0.7688200
##  8: -2.172490 0.0298191
##  9: -0.354310 0.7231070
## 10:  0.891361 0.3727350
## 11: -0.577569 0.5635550
## 12:  1.288640 0.1975240
## 13:  0.294649 0.7682620
## 14:  1.411880 0.1579850
## 15: -2.220460 0.0263879
## 16:  1.543050 0.1228190
qq(NoCov$P, cex=1, main="QQ-Plot")

manhattan(NoCov,chr="#CHROM",bp="POS",p="P",snp="SNP",col=c("orange","purple"),chrlabs=NULL,annotatePval=NULL,annotateTop=TRUE, ylim=c(0,4), suggestiveline=-log10(0.05/16), main="Manhattan Plot No Covariates")
## Warning in manhattan(NoCov, chr = "#CHROM", bp = "POS", p = "P", snp = "SNP", :
## No SNP column found. OK unless you're trying to highlight.

As the models continue to show a lack of significance in terms of the SNPs I then moved on to including an interaction term(s) in the models.

Sex*Age Interaction w/ All Other Covariates

First I tried a basic Sex*Age interaction which is commonly used in GWAS/Meta-Analysis.

SexAgeInt <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/Interaction.PHENO1.glm.logistic", 
               sep="\t", header=T)
SexAgeInt
##     #CHROM      POS         ID REF ALT A1 TEST OBS_CT       OR LOG(OR)_SE
##  1:      2 85666618  rs1130866   T   C  C  ADD    254 0.938388   0.206162
##  2:      2 85667185  rs3024798   C   A  A  ADD    128 0.575510   0.304426
##  3:      2 85668215  rs2077079   A   C  C  ADD    128 0.786036   0.281829
##  4:      8 22163524     rs4715   C   A  A  ADD    255 1.071150   0.232011
##  5:      8 22164004     rs1124   G   A  A  ADD    255 1.139410   0.229819
##  6:     10 79557289  rs1965708   C   A  A  ADD    390 0.825296   0.182629
##  7:     10 79557536  rs1965707   C   T  T  ADD    390 1.014670   0.162358
##  8:     10 79558907 rs17886395   G   C  C  ADD    390 0.755542   0.126237
##  9:     10 79559458  rs1059046   A   C  C  ADD    261       NA         NA
## 10:     10 79611881  rs1059047   T   C  C  ADD    263       NA         NA
## 11:     10 79611973  rs1136450   G   C  C  ADD    263       NA         NA
## 12:     10 79612325  rs1136451   A   G  G  ADD    392 1.243180   0.183408
## 13:     10 79613765  rs1059057   A   G  G  ADD    384 1.057800   0.249148
## 14:     10 79614021  rs4253527   C   T  T  ADD    391 1.391460   0.239253
## 15:     10 79941966  rs2243639   G   A  A  ADD    269       NA         NA
## 16:     10 79946568   rs721917   T   C  C  ADD    400 1.236180   0.151476
##         Z_STAT         P
##  1: -0.3084540 0.7577370
##  2: -1.8148900 0.0695407
##  3: -0.8542480 0.3929680
##  4:  0.2962450 0.7670430
##  5:  0.5678660 0.5701260
##  6: -1.0513900 0.2930810
##  7:  0.0897176 0.9285120
##  8: -2.2205800 0.0263791
##  9:         NA        NA
## 10:         NA        NA
## 11:         NA        NA
## 12:  1.1868100 0.2353040
## 13:  0.2255210 0.8215740
## 14:  1.3807600 0.1673530
## 15:         NA        NA
## 16:  1.3997400 0.1615910
qq(SexAgeInt$P, cex=1, main="QQ-Plot")

manhattan(SexAgeInt,chr="#CHROM",bp="POS",p="P",snp="SNP",col=c("orange","purple"),chrlabs=NULL,annotatePval=NULL,annotateTop=TRUE, ylim=c(0,4), suggestiveline=-log10(0.05/16), main="Manhattan Plot Sex*Age Interaction")
## Warning in manhattan(SexAgeInt, chr = "#CHROM", bp = "POS", p = "P", snp =
## "SNP", : No SNP column found. OK unless you're trying to highlight.

Next we build a Multivariate-Model which contains all of the multi-allelic variants at once

MultiAllelicLM <- glm(Outcome ~ ., data=MultiAllelic, family=binomial)
summary(MultiAllelicLM)
## 
## Call:
## glm(formula = Outcome ~ ., family = binomial, data = MultiAllelic)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4333  -0.9254   0.3008   0.8242   2.2002  
## 
## Coefficients: (3 not defined because of singularities)
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.098e-01  4.280e-01  -0.724  0.46913    
## Sex               -9.882e-03  2.531e-01  -0.039  0.96886    
## Age               -1.217e-01  2.932e-02  -4.149 3.34e-05 ***
## Asian              1.897e+00  2.289e+00   0.829  0.40737    
## Black              7.185e-01  4.829e-01   1.488  0.13676    
## Hispanic          -8.534e-03  6.679e-01  -0.013  0.98981    
## Mix                       NA         NA      NA       NA    
## Other             -8.636e-01  7.991e-01  -1.081  0.27980    
## Smoking            1.137e-01  4.288e-01   0.265  0.79088    
## Pet                1.115e+00  4.562e-01   2.445  0.01450 *  
## Infection          1.380e+00  2.789e-01   4.949 7.46e-07 ***
## V1A1A              1.833e+01  2.752e+03   0.007  0.99469    
## V1A01A3            1.971e+00  9.608e-01   2.052  0.04019 *  
## V1A1A5            -1.291e+01  3.956e+03  -0.003  0.99740    
## V1A01A9            1.887e+01  1.971e+03   0.010  0.99236    
## V1A01A0            1.601e+00  6.099e-01   2.626  0.00864 ** 
## V1A1A0             4.501e-01  8.869e-01   0.508  0.61179    
## V1A1A1             1.492e-01  9.451e-01   0.158  0.87454    
## V1A01A2            7.593e-01  7.344e-01   1.034  0.30122    
## V1A01A5            8.683e-01  1.147e+00   0.757  0.44918    
## V1A11A1            1.424e+00  1.276e+00   1.116  0.26459    
## V1A31A5            1.665e+01  2.384e+03   0.007  0.99443    
## V1A11A2            1.582e+00  1.297e+00   1.221  0.22227    
## V1A1A2             1.552e+00  1.337e+00   1.161  0.24545    
## V1A11A3            1.667e-01  1.326e+00   0.126  0.89994    
## V1A11A5            1.321e+00  1.508e+00   0.876  0.38114    
## V1A01A8            1.524e+01  3.956e+03   0.004  0.99693    
## V1A1A3             1.563e+00  1.349e+00   1.158  0.24686    
## V1A21A2            1.777e+01  3.956e+03   0.004  0.99642    
## V1A21A5            1.752e+00  1.539e+00   1.138  0.25499    
## V1A51A10          -9.203e-01  2.257e+00  -0.408  0.68351    
## V1A51A5            1.689e+01  3.956e+03   0.004  0.99659    
## V1A11A10           1.659e+01  3.956e+03   0.004  0.99665    
## V1A01A12          -1.406e+01  3.956e+03  -0.004  0.99716    
## VNE2               1.756e+01  3.956e+03   0.004  0.99646    
## VNE1               2.232e+00  7.399e-01   3.017  0.00256 ** 
## VNE3              -1.798e+01  3.956e+03  -0.005  0.99637    
## VNE10             -1.839e+01  3.956e+03  -0.005  0.99629    
## V6A26A3           -5.579e-01  4.472e-01  -1.247  0.21226    
## V6A6A4             1.640e+01  3.956e+03   0.004  0.99669    
## V6A6A                     NA         NA      NA       NA    
## V6A26A2           -8.860e-01  6.336e-01  -1.398  0.16204    
## V6A6A2             5.402e-01  1.011e+00   0.534  0.59327    
## V6A46A4            2.087e-01  5.595e+03   0.000  0.99997    
## V6A6A3                    NA         NA      NA       NA    
## V6A26A4            4.417e-01  9.388e-01   0.470  0.63803    
## V6A36A3           -8.882e-01  1.019e+00  -0.872  0.38335    
## V6A26A7V6A36A6     1.617e+01  2.796e+03   0.006  0.99539    
## V6A36A4           -1.319e+00  1.127e+00  -1.170  0.24211    
## V6A126A13V6A26A18  1.557e+01  3.956e+03   0.004  0.99686    
## V6A26A8           -1.636e+00  1.691e+00  -0.967  0.33351    
## V6A36A14          -3.369e+01  5.595e+03  -0.006  0.99520    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 548.30  on 401  degrees of freedom
## Residual deviance: 408.99  on 353  degrees of freedom
## AIC: 506.99
## 
## Number of Fisher Scoring iterations: 16
MAodds <- exp(MultiAllelicLM$coefficients)
MALMData <- as.data.frame(cbind("Name"=matrix(names(MultiAllelicLM$coefficients), ncol=1)[-c(7,41,45)],
                                "OR"=na.omit(unname(MAodds)),
                                "P"=matrix(coef(summary(MultiAllelicLM))[,4], ncol=1)))
colnames(MALMData)[colnames(MALMData) == "V2"] <- "Beta"
colnames(MALMData)[colnames(MALMData) == "V3"] <- "P"
MALMData
##                 Name                   OR                    P
## 1        (Intercept)    0.733574727167779    0.469131924284838
## 2                Sex    0.990166655219187    0.968860548612354
## 3                Age    0.885441312809662 3.33815041443129e-05
## 4              Asian     6.66523537720427    0.407372978942602
## 5              Black     2.05144300430373    0.136761783878859
## 6           Hispanic    0.991502291072766    0.989805473441694
## 7              Other    0.421630599951538    0.279796493066583
## 8            Smoking     1.12042468238592    0.790884581779894
## 9                Pet     3.04995001449746   0.0144996719377657
## 10         Infection     3.97650617719077 7.46031894325567e-07
## 11             V1A1A     91044965.3903043    0.994685835521043
## 12           V1A01A3     7.18014164763176   0.0401861699941062
## 13            V1A1A5 2.48340028330359e-06    0.997397139928527
## 14           V1A01A9     156669638.577843      0.9923627566009
## 15           V1A01A0     4.96005504027756  0.00864199925188129
## 16            V1A1A0     1.56847368225219    0.611792248930491
## 17            V1A1A1     1.16093842602393    0.874540077257852
## 18           V1A01A2     2.13671339073284    0.301216164952109
## 19           V1A01A5     2.38284407507275    0.449179689452193
## 20           V1A11A1     4.15214040367493    0.264585252813042
## 21           V1A31A5     16997396.5369509    0.994427821168342
## 22           V1A11A2     4.86656505120099    0.222273743165493
## 23            V1A1A2     4.72278242746876    0.245454701468817
## 24           V1A11A3     1.18142165856906    0.899944288954544
## 25           V1A11A5     3.74730117315884    0.381143988316751
## 26           V1A01A8      4145432.1204602    0.996926896987262
## 27            V1A1A3     4.77150268059078    0.246856633512277
## 28           V1A21A2     51990237.3101743    0.996416841553639
## 29           V1A21A5      5.7641402770072    0.254985593466912
## 30          V1A51A10    0.398405970909073     0.68351451548207
## 31           V1A51A5     21689096.0142399    0.996593158221562
## 32          V1A11A10     15998726.5715601    0.996654529182827
## 33          V1A01A12 7.79940678677827e-07    0.997163561326784
## 34              VNE2     42360533.6106353    0.996458153120629
## 35              VNE1     9.31772160285101  0.00255678868979668
## 36              VNE3 1.56141881974383e-08    0.996374787593315
## 37             VNE10 1.02803546098812e-08    0.996290497046979
## 38           V6A26A3      0.5724372137033    0.212257581809937
## 39            V6A6A4     13310912.4511653    0.996691623088942
## 40           V6A26A2    0.412321888135349     0.16203628705383
## 41            V6A6A2     1.71637361441419    0.593274047726458
## 42           V6A46A4     1.23209677310105     0.99997023487875
## 43           V6A26A4     1.55528998472033    0.638032093725488
## 44           V6A36A3    0.411385235159794    0.383346424584058
## 45    V6A26A7V6A36A6     10485769.2881643    0.995386216510856
## 46           V6A36A4    0.267505012024327    0.242105038624873
## 47 V6A126A13V6A26A18     5792287.21463142    0.996859431366812
## 48           V6A26A8    0.194853670590083    0.333507932175189
## 49          V6A36A14 2.33361437760947e-15    0.995195315503984

From these we results we find significance in varients NE1, 1A0/1A0 & 1A0/1A3 with additional significance in covariates Age, Pet and Infection. Specifically if we look at the variants, NE1 is considered risky, 1A0/1A0 is risky and 1A0/1A3 is also risky based off their calculated odds ratios as each of their beta estimates are > 1.

AgebyOutcome <- data.table(cbind(MultiAllelic$Age, MultiAllelic$Outcome))
Deaths <- AgebyOutcome %>% filter( V2 == "1")
Lived <- AgebyOutcome %>% filter( V2 == "0")
hist(Lived$V1, main="Histogram of Patients Who Lived", xlab=paste0("Age (Average: ",mean(Lived$V1),")"))
abline(v=mean(Lived$V1), col="red")

hist(Deaths$V1, main="Histogram of Patients Who Died", xlab=paste0("Age (Average: ",mean(Deaths$V1),")"))
abline(v=mean(Deaths$V1), col="red")

a <- c()
Varients <- colnames(MultiAllelic[12:52])
for (i in colnames(MultiAllelic[12:52])){
  x <- paste0("Outcome ~ ", i, " + Sex + Age + Asian + Black + Hispanic + Mix + Other + Smoking + Pet + Infection")
  individualmodel <- glm(as.formula(x), data=MultiAllelic, family=binomial)
  a <- c(a, coef(summary(individualmodel))[2,4])
}
IndMod <- as.data.frame(cbind("Varients"=Varients, "PValues"=a))
IndMod  %>% arrange(PValues)
##             Varients            PValues
## 1             V6A6A4  0.058197709196178
## 2               VNE1 0.0669266891379583
## 3            V6A26A4 0.0678367587102841
## 4             V1A1A5 0.0855380788043218
## 5             V6A6A2  0.217923904652174
## 6             V1A1A2  0.224392669340978
## 7            V6A26A2  0.255590265278709
## 8             V1A1A0  0.256458866664515
## 9            V1A01A0  0.263780039371406
## 10          V1A51A10  0.302922483500094
## 11           V6A26A3  0.304763863984181
## 12            V6A6A3  0.304763863984181
## 13           V1A01A3  0.322612193398784
## 14           V1A11A3  0.420720174653368
## 15           V6A36A4  0.456146240600145
## 16            V1A1A3  0.494839954047621
## 17           V1A01A5  0.564471727861534
## 18          V1A11A10  0.604310699575505
## 19           V6A26A8  0.684752346166993
## 20            V1A1A1  0.719268798623756
## 21           V1A11A5   0.76963343970846
## 22           V1A11A1  0.907849270476197
## 23           V6A36A3  0.968301563216385
## 24              VNE3  0.977781121145797
## 25             VNE10  0.977969223881147
## 26           V1A21A5   0.97865391817507
## 27              VNE2  0.979699059851026
## 28           V6A46A4  0.979859765517212
## 29          V6A36A14   0.97994555111399
## 30             V1A1A  0.980542559508865
## 31             V6A6A  0.980542559508865
## 32           V1A21A2  0.981272692441606
## 33           V1A51A5  0.981704717258427
## 34 V6A126A13V6A26A18  0.982374761087439
## 35           V1A01A9   0.98251770604902
## 36          V1A01A12  0.982717575732278
## 37    V6A26A7V6A36A6  0.983107703836453
## 38           V1A01A8  0.985180654098441
## 39           V1A31A5  0.985765357406388
## 40           V1A11A2  0.989918405389124
## 41           V1A01A2  0.998531733598324

When looking at individual tests however we find that there is no significance in any of the variants.

Confidence Intervals for Full Multivariate-Model

exp(na.omit(confint.default(MultiAllelicLM, level=0.95)))
##                         2.5 %      97.5 %
## (Intercept)       0.317051752   1.6972998
## Sex               0.602883584   1.6262344
## Age               0.835985684   0.9378227
## Asian             0.074988289 592.4306706
## Black             0.796176848   5.2857834
## Hispanic          0.267776059   3.6712647
## Other             0.088055198   2.0188742
## Smoking           0.483467498   2.5965581
## Pet               1.247430156   7.4570869
## Infection         2.301858865   6.8694921
## V1A1A             0.000000000         Inf
## V1A01A3           1.092273694  47.1991904
## V1A1A5            0.000000000         Inf
## V1A01A9           0.000000000         Inf
## V1A01A0           1.500989306  16.3906204
## V1A1A0            0.275784299   8.9204124
## V1A1A1            0.182105806   7.4010712
## V1A01A2           0.506527868   9.0134115
## V1A01A5           0.251460735  22.5798508
## V1A11A1           0.340460755  50.6380535
## V1A31A5           0.000000000         Inf
## V1A11A2           0.383386319  61.7743885
## V1A1A2            0.343945169  64.8495047
## V1A11A3           0.087846321  15.8886236
## V1A11A5           0.194882285  72.0551182
## V1A01A8           0.000000000         Inf
## V1A1A3            0.338860213  67.1876985
## V1A21A2           0.000000000         Inf
## V1A21A5           0.282424449 117.6431900
## V1A51A10          0.004773236  33.2536062
## V1A51A5           0.000000000         Inf
## V1A11A10          0.000000000         Inf
## V1A01A12          0.000000000         Inf
## VNE2              0.000000000         Inf
## VNE1              2.185299444  39.7290797
## VNE3              0.000000000         Inf
## VNE10             0.000000000         Inf
## V6A26A3           0.238261978   1.3753112
## V6A6A4            0.000000000         Inf
## V6A26A2           0.119098222   1.4274717
## V6A6A2            0.236408371  12.4612270
## V6A46A4           0.000000000         Inf
## V6A26A4           0.247001959   9.7931488
## V6A36A3           0.055841530   3.0306801
## V6A26A7V6A36A6    0.000000000         Inf
## V6A36A4           0.029362708   2.4370685
## V6A126A13V6A26A18 0.000000000         Inf
## V6A26A8           0.007082098   5.3611165
## V6A36A14          0.000000000         Inf
## attr(,"na.action")
##    Mix  V6A6A V6A6A3 
##      7     41     45 
## attr(,"class")
## [1] "omit"

For Individual tests we found nothing of significance for multi-allelic variants.

Likelihood Ratio Tests

fullmodel <- glm(Outcome ~ ., data=MultiAllelic, family=binomial)
covmodel <- glm(Outcome ~ Sex + Age + Black + Asian + Hispanic + Other + Mix + Smoking + Pet + Infection, data=MultiAllelic, family=binomial)

lrtest(fullmodel, covmodel)
## Likelihood ratio test
## 
## Model 1: Outcome ~ Sex + Age + Asian + Black + Hispanic + Mix + Other + 
##     Smoking + Pet + Infection + V1A1A + V1A01A3 + V1A1A5 + V1A01A9 + 
##     V1A01A0 + V1A1A0 + V1A1A1 + V1A01A2 + V1A01A5 + V1A11A1 + 
##     V1A31A5 + V1A11A2 + V1A1A2 + V1A11A3 + V1A11A5 + V1A01A8 + 
##     V1A1A3 + V1A21A2 + V1A21A5 + V1A51A10 + V1A51A5 + V1A11A10 + 
##     V1A01A12 + VNE2 + VNE1 + VNE3 + VNE10 + V6A26A3 + V6A6A4 + 
##     V6A6A + V6A26A2 + V6A6A2 + V6A46A4 + V6A6A3 + V6A26A4 + V6A36A3 + 
##     V6A26A7V6A36A6 + V6A36A4 + V6A126A13V6A26A18 + V6A26A8 + 
##     V6A36A14
## Model 2: Outcome ~ Sex + Age + Black + Asian + Hispanic + Other + Mix + 
##     Smoking + Pet + Infection
##   #Df  LogLik  Df  Chisq Pr(>Chisq)  
## 1  49 -204.50                        
## 2  10 -233.22 -39 57.445    0.02865 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

I performed the likelihood ratio test to compare the fit of the two models, i.e. does the data fit each of the two models equally well, and from this we see that with a Chisq of 57.445 and P-Value of 0.02865, that we reject the null hypothesis that the two are equal. Therefore, the model with the additional multi-allelic variants is a better fit and offers significant improvement over the model with only the covarites as our predictors.

MAF

# plink --file RSVData --freq --mind 1 --nonfounders

Freq <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/plink.frq", sep=" ", header=T)
Freq
##     CHR        SNP A1 A2     MAF NCHROBS
##  1:   2  rs1130866  C  T 0.47050     508
##  2:   2  rs3024798  A  C 0.31250     256
##  3:   2  rs2077079  C  A 0.40230     256
##  4:   8     rs4715  A  C 0.25100     510
##  5:   8     rs1124  A  G 0.30980     510
##  6:  10  rs1965708  A  C 0.19820     782
##  7:  10  rs1965707  T  C 0.27370     782
##  8:  10 rs17886395  C  G 0.39900     782
##  9:  10  rs1059046  C  A 0.42370     524
## 10:  10  rs1059047  C  T 0.09848     528
## 11:  10  rs1136450  C  G 0.44700     528
## 12:  10  rs1136451  G  A 0.19080     786
## 13:  10  rs1059057  G  A 0.10000     770
## 14:  10  rs4253527  T  C 0.10080     784
## 15:  10  rs2243639  A  G 0.35370     540
## 16:  10   rs721917  C  T 0.42770     802

Dominant Recessive Run

# plink2 --bfile RSVData --covar RSVCovariates.txt --glm hide-covar genotypic

DomRec <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/DOMDEV.PHENO1.glm.logistic", sep="\t", header=T)
DomRec
##     #CHROM      POS         ID REF ALT A1     TEST OBS_CT       OR LOG(OR)_SE
##  1:      2 85666618  rs1130866   T   C  C      ADD    254       NA         NA
##  2:      2 85666618  rs1130866   T   C  C   DOMDEV    254       NA         NA
##  3:      2 85666618  rs1130866   T   C  C GENO_2DF    254       NA         NA
##  4:      2 85667185  rs3024798   C   A  A      ADD    128 0.544648   0.374128
##  5:      2 85667185  rs3024798   C   A  A   DOMDEV    128 0.662937   0.474499
##  6:      2 85667185  rs3024798   C   A  A GENO_2DF    128       NA         NA
##  7:      2 85668215  rs2077079   A   C  C      ADD    128 0.656310   0.314192
##  8:      2 85668215  rs2077079   A   C  C   DOMDEV    128 0.914694   0.401808
##  9:      2 85668215  rs2077079   A   C  C GENO_2DF    128       NA         NA
## 10:      8 22163524     rs4715   C   A  A      ADD    255       NA         NA
## 11:      8 22163524     rs4715   C   A  A   DOMDEV    255       NA         NA
## 12:      8 22163524     rs4715   C   A  A GENO_2DF    255       NA         NA
## 13:      8 22164004     rs1124   G   A  A      ADD    255       NA         NA
## 14:      8 22164004     rs1124   G   A  A   DOMDEV    255       NA         NA
## 15:      8 22164004     rs1124   G   A  A GENO_2DF    255       NA         NA
## 16:     10 79557289  rs1965708   C   A  A      ADD    390 0.905022   0.258953
## 17:     10 79557289  rs1965708   C   A  A   DOMDEV    390 0.928540   0.320549
## 18:     10 79557289  rs1965708   C   A  A GENO_2DF    390       NA         NA
## 19:     10 79557536  rs1965707   C   T  T      ADD    390 1.071450   0.194704
## 20:     10 79557536  rs1965707   C   T  T   DOMDEV    390 0.935901   0.258764
## 21:     10 79557536  rs1965707   C   T  T GENO_2DF    390       NA         NA
## 22:     10 79558907 rs17886395   G   C  C      ADD    390 0.748748   0.127965
## 23:     10 79558907 rs17886395   G   C  C   DOMDEV    390 1.214360   0.234634
## 24:     10 79558907 rs17886395   G   C  C GENO_2DF    390       NA         NA
## 25:     10 79559458  rs1059046   A   C  C      ADD    261 0.947906   0.199640
## 26:     10 79559458  rs1059046   A   C  C   DOMDEV    261 0.774079   0.277633
## 27:     10 79559458  rs1059046   A   C  C GENO_2DF    261       NA         NA
## 28:     10 79611881  rs1059047   T   C  C      ADD    263       NA         NA
## 29:     10 79611881  rs1059047   T   C  C   DOMDEV    263       NA         NA
## 30:     10 79611881  rs1059047   T   C  C GENO_2DF    263       NA         NA
## 31:     10 79611973  rs1136450   G   C  C      ADD    263 0.878743   0.189456
## 32:     10 79611973  rs1136450   G   C  C   DOMDEV    263 0.874953   0.264355
## 33:     10 79611973  rs1136450   G   C  C GENO_2DF    263       NA         NA
## 34:     10 79612325  rs1136451   A   G  G      ADD    392 1.456980   0.272894
## 35:     10 79612325  rs1136451   A   G  G   DOMDEV    392 0.759206   0.334302
## 36:     10 79612325  rs1136451   A   G  G GENO_2DF    392       NA         NA
## 37:     10 79613765  rs1059057   A   G  G      ADD    384 1.187250   0.619605
## 38:     10 79613765  rs1059057   A   G  G   DOMDEV    384 0.848591   0.666238
## 39:     10 79613765  rs1059057   A   G  G GENO_2DF    384       NA         NA
## 40:     10 79614021  rs4253527   C   T  T      ADD    391 1.198090   0.374249
## 41:     10 79614021  rs4253527   C   T  T   DOMDEV    391 1.268500   0.457329
## 42:     10 79614021  rs4253527   C   T  T GENO_2DF    391       NA         NA
## 43:     10 79941966  rs2243639   G   A  A      ADD    269 0.628845   0.204708
## 44:     10 79941966  rs2243639   G   A  A   DOMDEV    269 1.245520   0.282497
## 45:     10 79941966  rs2243639   G   A  A GENO_2DF    269       NA         NA
## 46:     10 79946568   rs721917   T   C  C      ADD    400 1.186520   0.155563
## 47:     10 79946568   rs721917   T   C  C   DOMDEV    400 1.165110   0.212786
## 48:     10 79946568   rs721917   T   C  C GENO_2DF    400       NA         NA
##     #CHROM      POS         ID REF ALT A1     TEST OBS_CT       OR LOG(OR)_SE
##     Z_OR_F_STAT         P
##  1:          NA        NA
##  2:          NA        NA
##  3:          NA        NA
##  4:  -1.6240800 0.1043580
##  5:  -0.8663360 0.3863060
##  6:   3.6117100 0.0297931
##  7:  -1.3403300 0.1801380
##  8:  -0.2219130 0.8243820
##  9:   1.1541900 0.3185730
## 10:          NA        NA
## 11:          NA        NA
## 12:          NA        NA
## 13:          NA        NA
## 14:          NA        NA
## 15:          NA        NA
## 16:  -0.3853830 0.6999540
## 17:  -0.2312960 0.8170850
## 18:   0.3349570 0.7155750
## 19:   0.3544730 0.7229850
## 20:  -0.2560090 0.7979440
## 21:   0.0652580 0.9368360
## 22:  -2.2611800 0.0237480
## 23:   0.8277450 0.4078150
## 24:   2.6924900 0.0689706
## 25:  -0.2679820 0.7887130
## 26:  -0.9223730 0.3563340
## 27:   0.5308460 0.5887410
## 28:          NA        NA
## 29:          NA        NA
## 30:          NA        NA
## 31:  -0.6822820 0.4950610
## 32:  -0.5053250 0.6133300
## 33:   0.4001510 0.6706260
## 34:   1.3791600 0.1678460
## 35:  -0.8240490 0.4099120
## 36:   0.9870450 0.3736010
## 37:   0.2770160 0.7817680
## 38:  -0.2464250 0.8053530
## 39:   0.0385039 0.9622320
## 40:   0.4829150 0.6291560
## 41:   0.5200530 0.6030270
## 42:   1.1125600 0.3297530
## 43:  -2.2660000 0.0234511
## 44:   0.7771890 0.4370470
## 45:   2.5674300 0.0786121
## 46:   1.0994000 0.2715940
## 47:   0.7181670 0.4726540
## 48:   1.1131300 0.3295430
##     Z_OR_F_STAT         P

In trying the dominant/recessive model I found that there were a lot more sparsity within the results with only a single significant SNP prior to p-value correction, rs3024798, with the additive models for rs2243639 & rs17886395 being significant. However when considering the p-value correction to P = 0.003125, no SNP is significant any longer.

Frequency Table

FrequencyTable <- as.data.frame(cbind("SNP ID"=Freq$SNP, 
                                      "Chromosome"=Freq$CHR,
                                      "Allele 1"=Freq$A1, 
                                      "Allele 1 Frequency"=(1-Freq$MAF), 
                                      "Allele 2"=Freq$A2, 
                                      "Allele 2 Frequency"=Freq$MAF))
FrequencyTable %>%
  kbl(caption = "Allele Frequencies of PossibleRSV Associated SNPs") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Allele Frequencies of PossibleRSV Associated SNPs
SNP ID Chromosome Allele 1 Allele 1 Frequency Allele 2 Allele 2 Frequency
rs1130866 2 C 0.5295 T 0.4705
rs3024798 2 A 0.6875 C 0.3125
rs2077079 2 C 0.5977 A 0.4023
rs4715 8 A 0.749 C 0.251
rs1124 8 A 0.6902 G 0.3098
rs1965708 10 A 0.8018 C 0.1982
rs1965707 10 T 0.7263 C 0.2737
rs17886395 10 C 0.601 G 0.399
rs1059046 10 C 0.5763 A 0.4237
rs1059047 10 C 0.90152 T 0.09848
rs1136450 10 C 0.553 G 0.447
rs1136451 10 G 0.8092 A 0.1908
rs1059057 10 G 0.9 A 0.1
rs4253527 10 T 0.8992 C 0.1008
rs2243639 10 A 0.6463 G 0.3537
rs721917 10 C 0.5723 T 0.4277
casefreq <- PEDFile[1:231,]
contfreq <- PEDFile[232:402,]
write.table(casefreq, file="/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/CaseFreq.ped", 
            sep="\t",
            quote=F,
            col.names=F, 
            row.names=F)
write.table(MAPFile, 
            "/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/CaseFreq.map", 
            sep="\t", 
            quote=F,
            col.names=F,
            row.names=F)
write.table(contfreq, file="/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/ControlFreq.ped", 
            sep="\t",
            quote=F,
            col.names=F, 
            row.names=F)
write.table(MAPFile, 
            "/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/ControlFreq.map", 
            sep="\t", 
            quote=F,
            col.names=F,
            row.names=F)

# plink --file CaseFreq --freq --mind 1 --nonfounders --out CaseFreq
# plink --file ControlFreq --freq --mind 1 --nonfounders --out ContFreq
CaFreq <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/CaseFreq.frq", sep=" ", header=T)
CoFreq <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/ContFreq.frq", sep=" ", header=T)

CaFrequencyTable <- as.data.frame(cbind("SNP ID"=CaFreq$SNP, 
                                        "Chromosome"=CaFreq$CHR, 
                                        "Allele 1"=CaFreq$A1, 
                                        "Allele 1 Frequency"=(1-CaFreq$MAF), 
                                        "Allele 2"=CaFreq$A2, 
                                        "Allele 2 Frequency"=CaFreq$MAF))

CoFrequencyTable <- as.data.frame(cbind("SNP ID"=CoFreq$SNP, 
                                        "Chromosome"=CoFreq$CHR, 
                                        "Allele 1"=CoFreq$A1, 
                                        "Allele 1 Frequency"=(1-CoFreq$MAF), 
                                        "Allele 2"=CoFreq$A2, 
                                        "Allele 2 Frequency"=CoFreq$MAF))

CaFrequencyTable %>%
  kbl(caption = "Allele Frequencies of Possible Case Associated SNPs") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Allele Frequencies of Possible Case Associated SNPs
SNP ID Chromosome Allele 1 Allele 1 Frequency Allele 2 Allele 2 Frequency
rs1130866 2 C 0.5333 T 0.4667
rs3024798 2 A 0.7593 C 0.2407
rs2077079 2 C 0.6389 A 0.3611
rs4715 8 A 0.7472 C 0.2528
rs1124 8 A 0.6878 G 0.3122
rs1965708 10 A 0.8111 C 0.1889
rs1965707 10 T 0.7222 C 0.2778
rs17886395 10 C 0.64 G 0.36
rs1059046 10 C 0.5819 A 0.4181
rs1059047 10 C 0.8929 T 0.1071
rs1136450 10 C 0.5625 G 0.4375
rs1136451 10 G 0.7928 A 0.2072
rs1059057 10 G 0.8973 A 0.1027
rs4253527 10 T 0.8851 C 0.1149
rs2243639 10 A 0.68 G 0.32
rs721917 10 C 0.5498 T 0.4502
CoFrequencyTable %>%
  kbl(caption = "Allele Frequencies of Possible Control Associated SNPs") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Allele Frequencies of Possible Control Associated SNPs
SNP ID Chromosome Allele 1 Allele 1 Frequency Allele 2 Allele 2 Frequency
rs1130866 2 C 0.5203 T 0.4797
rs3024798 2 A 0.6351 C 0.3649
rs2077079 2 C 0.5676 A 0.4324
rs4715 8 A 0.7533 C 0.2467
rs1124 8 A 0.6959 G 0.3041
rs1965708 10 A 0.7892 C 0.2108
rs1965707 10 T 0.7319 C 0.2681
rs17886395 10 C 0.5482 G 0.4518
rs1059046 10 C 0.5659 A 0.4341
rs1059047 10 C 0.91667 T 0.08333
rs1136450 10 C 0.5365 G 0.4635
rs1136451 10 G 0.8304 A 0.1696
rs1059057 10 G 0.90361 A 0.09639
rs4253527 10 T 0.91765 C 0.08235
rs2243639 10 A 0.5842 G 0.4158
rs721917 10 C 0.6029 T 0.3971

Permutations Testing

recode <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/RSVrecode.raw", sep=" ", header=T)
recodecov <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/RSVrecode.cov", sep=" ", header=T)
Rrs1130866 <- c()
Rrs3024798 <- c()
Rrs2077079 <- c()
Rrs4715 <- c()
Rrs1124 <- c()
Rrs1965708 <- c()
Rrs1965707 <- c()
Rrs17886395 <- c()
Rrs1059046 <- c()
Rrs1059047 <- c()
Rrs1136450 <- c()
Rrs1136451 <- c()
Rrs1059057 <- c()
Rrs4253527 <- c()
Rrs2243639 <- c()
Rrs721917 <- c()
recode <- dplyr::select(recode,-ends_with("HET"))
recode <- dplyr::select(recode,-c(PAT,MAT,SEX))
recode <- merge(recode, recodecov, by='FID')
recode$PHENOTYPE <- ifelse(recode$PHENOTYPE==2,1,0)
vec <- colnames(dplyr::select(recode, starts_with("rs")))
t <- 1000
N <- nrow(recode)
for (i in 1:t){
  recode$PHENOTYPE <- recode$PHENOTYPE[sample(nrow(recode), N, replace=F)]
  rs1130866 <- glm(
    PHENOTYPE ~ rs1130866_C + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs3024798 <- glm(
    PHENOTYPE ~ rs3024798_A + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs2077079 <- glm(
    PHENOTYPE ~ rs2077079_C + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs4715 <- glm(
    PHENOTYPE ~ rs4715_A + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs1124 <- glm(
    PHENOTYPE ~ rs1124_A + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs1965708 <- glm(
    PHENOTYPE ~ rs1965708_A + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs1965707 <- glm(
    PHENOTYPE ~ rs1965707_T + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs17886395 <- glm(
    PHENOTYPE ~ rs17886395_C + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs1059046 <- glm(
    PHENOTYPE ~ rs1059046_C + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs1059047 <- glm(
    PHENOTYPE ~ rs1059047_C + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs1136450 <- glm(
    PHENOTYPE ~ rs1136450_C + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs1136451 <- glm(
    PHENOTYPE ~ rs1136451_G + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs1059057 <- glm(
    PHENOTYPE ~ rs1059057_G + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs4253527 <- glm(
    PHENOTYPE ~ rs4253527_T + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs2243639 <- glm(
    PHENOTYPE ~ rs2243639_A + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  rs721917 <- glm(
    PHENOTYPE ~ rs721917_C + Sex + Smoking + Pets + Infection + Ancestry_6 + Ancestry_3 + Ancestry_5 + Ancestry_4 + Ancestry_1 + Age, 
    data=recode, family="binomial")
  Rrs1130866 <- append(Rrs1130866, summary(rs1130866)$coefficients[2,4])
  Rrs3024798 <- append(Rrs3024798, summary(rs3024798)$coefficients[2,4])
  Rrs2077079 <- append(Rrs2077079, summary(rs2077079)$coefficients[2,4])
  Rrs4715 <- append(Rrs4715, summary(rs4715)$coefficients[2,4])
  Rrs1124 <- append(Rrs1124, summary(rs1124)$coefficients[2,4])
  Rrs1965708 <- append(Rrs1965708, summary(rs1965708)$coefficients[2,4])
  Rrs1965707 <- append(Rrs1965707, summary(rs1965707)$coefficients[2,4])
  Rrs17886395 <- append(Rrs17886395, summary(rs17886395)$coefficients[2,4])
  Rrs1059046 <- append(Rrs1059046, summary(rs1059046)$coefficients[2,4])
  Rrs1059047 <- append(Rrs1059047, summary(rs1059047)$coefficients[2,4])
  Rrs1136450 <- append(Rrs1136450, summary(rs1136450)$coefficients[2,4])
  Rrs1136451 <- append(Rrs1136451, summary(rs1136451)$coefficients[2,4])
  Rrs1059057 <- append(Rrs1059057, summary(rs1059057)$coefficients[2,4])
  Rrs4253527 <- append(Rrs4253527, summary(rs4253527)$coefficients[2,4])
  Rrs2243639 <- append(Rrs2243639, summary(rs2243639)$coefficients[2,4])
  Rrs721917 <- append(Rrs721917, summary(rs721917)$coefficients[2,4])
  Permutation_Results <- as.data.frame(cbind("rs1130866"=Rrs1130866,
                                             "rs3024798"=Rrs3024798,
                                             "rs2077079"=Rrs2077079,
                                             "rs4715"=Rrs4715,
                                             "rs1124"=Rrs1124,
                                             "rs1965708"=Rrs1965708,
                                             "rs1965707"=Rrs1965707,
                                             "rs17886395"=Rrs17886395,
                                             "rs1059046"=Rrs1059046,
                                             "rs1059047"=Rrs1059047,
                                             "rs1136450"=Rrs1136450,
                                             "rs1136451"=Rrs1136451,
                                             "rs1059057"=Rrs1059057,
                                             "rs4253527"=Rrs4253527,
                                             "rs2243639"=Rrs2243639,
                                             "rs721917"=Rrs721917))
}
distrib <- data.frame(rowMin(as.matrix(Permutation_Results)))
colnames(distrib) <- "P_Value"

Histograms for Each SNP w/ P-Value

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1130866
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1130866 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[1]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[1]), col="red")

#rs4715
hist(as.numeric(distrib$P_Value), xlab=paste0("rs4715 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[4]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[4]), col="red")

#rs1124
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1124 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[5]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[5]), col="red")

#rs1965708
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1965708 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[6]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[6]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1965707
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1965707 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[7]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[7]), col="red")

#rs17886395
hist(as.numeric(distrib$P_Value), xlab=paste0("rs17886395 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[8]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[8]), col="red")

#rs1059046
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059046 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[9]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[9]), col="red")

#rs1059047
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059047 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[10]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[10]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1136450
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1136450 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[11]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[11]), col="red")

#rs1136451
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1136451 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[12]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[12]), col="red")

#rs1059057
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059057 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[13]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[13]), col="red")

#rs4253527
hist(as.numeric(distrib$P_Value), xlab=paste0("rs4253527 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[14]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[14]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs3024798
hist(as.numeric(distrib$P_Value), xlab=paste0("rs3024798 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[2]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[2]), col="red")

#rs2077079
hist(as.numeric(distrib$P_Value), xlab=paste0("rs2077079 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[3]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[3]), col="red")

#rs2243639
hist(as.numeric(distrib$P_Value), xlab=paste0("rs2243639 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[15]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[15]), col="red")

#rs721917
hist(as.numeric(distrib$P_Value), xlab=paste0("rs721917 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < UpCovAssoc$P[16]))/nrow(distrib),")"), main=NULL)
abline(v=summary(UpCovAssoc$P[16]), col="red")

Permutations for Association 2 (Association without covariates)

recode <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/RSVrecode.raw", sep=" ", header=T)
recodecov <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/RSVrecode.cov", sep=" ", header=T)
Rrs1130866 <- c()
Rrs3024798 <- c()
Rrs2077079 <- c()
Rrs4715 <- c()
Rrs1124 <- c()
Rrs1965708 <- c()
Rrs1965707 <- c()
Rrs17886395 <- c()
Rrs1059046 <- c()
Rrs1059047 <- c()
Rrs1136450 <- c()
Rrs1136451 <- c()
Rrs1059057 <- c()
Rrs4253527 <- c()
Rrs2243639 <- c()
Rrs721917 <- c()
recode <- dplyr::select(recode,-ends_with("HET"))
recode <- dplyr::select(recode,-c(PAT,MAT,SEX))
recode <- merge(recode, recodecov, by='FID')
recode$PHENOTYPE <- ifelse(recode$PHENOTYPE==2,1,0)
vec <- colnames(dplyr::select(recode, starts_with("rs")))
t <- 1000
N <- nrow(recode)
for (i in 1:t){
  recode$PHENOTYPE <- recode$PHENOTYPE[sample(nrow(recode), N, replace=F)]
  rs1130866 <- glm(PHENOTYPE ~ rs1130866_C, data=recode, family="binomial")
  rs3024798 <- glm(PHENOTYPE ~ rs3024798_A, data=recode, family="binomial")
  rs2077079 <- glm(PHENOTYPE ~ rs2077079_C, data=recode, family="binomial")
  rs4715 <- glm(PHENOTYPE ~ rs4715_A, data=recode, family="binomial")
  rs1124 <- glm(PHENOTYPE ~ rs1124_A, data=recode, family="binomial")
  rs1965708 <- glm(PHENOTYPE ~ rs1965708_A, data=recode, family="binomial")
  rs1965707 <- glm(PHENOTYPE ~ rs1965707_T, data=recode, family="binomial")
  rs17886395 <- glm(PHENOTYPE ~ rs17886395_C, data=recode, family="binomial")
  rs1059046 <- glm(PHENOTYPE ~ rs1059046_C, data=recode, family="binomial")
  rs1059047 <- glm(PHENOTYPE ~ rs1059047_C, data=recode, family="binomial")
  rs1136450 <- glm(PHENOTYPE ~ rs1136450_C, data=recode, family="binomial")
  rs1136451 <- glm(PHENOTYPE ~ rs1136451_G, data=recode, family="binomial")
  rs1059057 <- glm(PHENOTYPE ~ rs1059057_G, data=recode, family="binomial")
  rs4253527 <- glm(PHENOTYPE ~ rs4253527_T, data=recode, family="binomial")
  rs2243639 <- glm(PHENOTYPE ~ rs2243639_A, data=recode, family="binomial")
  rs721917 <- glm(PHENOTYPE ~ rs721917_C, data=recode, family="binomial")
  Rrs1130866 <- append(Rrs1130866, summary(rs1130866)$coefficients[2,4])
  Rrs3024798 <- append(Rrs3024798, summary(rs3024798)$coefficients[2,4])
  Rrs2077079 <- append(Rrs2077079, summary(rs2077079)$coefficients[2,4])
  Rrs4715 <- append(Rrs4715, summary(rs4715)$coefficients[2,4])
  Rrs1124 <- append(Rrs1124, summary(rs1124)$coefficients[2,4])
  Rrs1965708 <- append(Rrs1965708, summary(rs1965708)$coefficients[2,4])
  Rrs1965707 <- append(Rrs1965707, summary(rs1965707)$coefficients[2,4])
  Rrs17886395 <- append(Rrs17886395, summary(rs17886395)$coefficients[2,4])
  Rrs1059046 <- append(Rrs1059046, summary(rs1059046)$coefficients[2,4])
  Rrs1059047 <- append(Rrs1059047, summary(rs1059047)$coefficients[2,4])
  Rrs1136450 <- append(Rrs1136450, summary(rs1136450)$coefficients[2,4])
  Rrs1136451 <- append(Rrs1136451, summary(rs1136451)$coefficients[2,4])
  Rrs1059057 <- append(Rrs1059057, summary(rs1059057)$coefficients[2,4])
  Rrs4253527 <- append(Rrs4253527, summary(rs4253527)$coefficients[2,4])
  Rrs2243639 <- append(Rrs2243639, summary(rs2243639)$coefficients[2,4])
  Rrs721917 <- append(Rrs721917, summary(rs721917)$coefficients[2,4])
  Permutation_Results <- as.data.frame(cbind("rs1130866"=Rrs1130866,
                                             "rs3024798"=Rrs3024798,
                                             "rs2077079"=Rrs2077079,
                                             "rs4715"=Rrs4715,
                                             "rs1124"=Rrs1124,
                                             "rs1965708"=Rrs1965708,
                                             "rs1965707"=Rrs1965707,
                                             "rs17886395"=Rrs17886395,
                                             "rs1059046"=Rrs1059046,
                                             "rs1059047"=Rrs1059047,
                                             "rs1136450"=Rrs1136450,
                                             "rs1136451"=Rrs1136451,
                                             "rs1059057"=Rrs1059057,
                                             "rs4253527"=Rrs4253527,
                                             "rs2243639"=Rrs2243639,
                                             "rs721917"=Rrs721917))
}
distrib <- data.frame(rowMin(as.matrix(Permutation_Results)))
colnames(distrib) <- "P_Value"
par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1130866
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1130866 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[1]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[1]), col="red")

#rs4715
hist(as.numeric(distrib$P_Value), xlab=paste0("rs4715 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[4]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[4]), col="red")

#rs1124
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1124 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[5]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[5]), col="red")

#rs1965708
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1965708 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[6]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[6]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1965707
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1965707 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[7]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[7]), col="red")

#rs17886395
hist(as.numeric(distrib$P_Value), xlab=paste0("rs17886395 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[8]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[8]), col="red")

#rs1059046
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059046 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[9]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[9]), col="red")

#rs1059047
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059047 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[10]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[10]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1136450
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1136450 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[11]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[11]), col="red")

#rs1136451
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1136451 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[12]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[12]), col="red")

#rs1059057
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059057 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[13]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[13]), col="red")

#rs4253527
hist(as.numeric(distrib$P_Value), xlab=paste0("rs4253527 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[14]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[14]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs3024798
hist(as.numeric(distrib$P_Value), xlab=paste0("rs3024798 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[2]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[2]), col="red")

#rs2077079
hist(as.numeric(distrib$P_Value), xlab=paste0("rs2077079 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[3]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[3]), col="red")

#rs2243639
hist(as.numeric(distrib$P_Value), xlab=paste0("rs2243639 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[15]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[15]), col="red")

#rs721917
hist(as.numeric(distrib$P_Value), xlab=paste0("rs721917 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < Assoc2$P[16]))/nrow(distrib),")"), main=NULL)
abline(v=summary(Assoc2$P[16]), col="red")

Permutatinos for LR Model without Covariates (NoCov)

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1130866
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1130866 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[1]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[1]), col="red")

#rs4715
hist(as.numeric(distrib$P_Value), xlab=paste0("rs4715 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[4]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[4]), col="red")

#rs1124
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1124 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[5]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[5]), col="red")

#rs1965708
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1965708 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[6]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[6]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1965707
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1965707 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[7]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[7]), col="red")

#rs17886395
hist(as.numeric(distrib$P_Value), xlab=paste0("rs17886395 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[8]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[8]), col="red")

#rs1059046
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059046 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[9]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[9]), col="red")

#rs1059047
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059047 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[10]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[10]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs1136450
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1136450 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[11]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[11]), col="red")

#rs1136451
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1136451 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[12]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[12]), col="red")

#rs1059057
hist(as.numeric(distrib$P_Value), xlab=paste0("rs1059057 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[13]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[13]), col="red")

#rs4253527
hist(as.numeric(distrib$P_Value), xlab=paste0("rs4253527 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[14]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[14]), col="red")

par(mfrow=c(2,2), oma=c(0, 0, 2, 0))
#rs3024798
hist(as.numeric(distrib$P_Value), xlab=paste0("rs3024798 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[2]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[2]), col="red")

#rs2077079
hist(as.numeric(distrib$P_Value), xlab=paste0("rs2077079 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[3]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[3]), col="red")

#rs2243639
hist(as.numeric(distrib$P_Value), xlab=paste0("rs2243639 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[15]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[15]), col="red")

#rs721917
hist(as.numeric(distrib$P_Value), xlab=paste0("rs721917 Min P Estimates (P-Value: ",nrow(filter(distrib, distrib$P_Value < NoCov$P[16]))/nrow(distrib),")"), main=NULL)
abline(v=summary(NoCov$P[16]), col="red")

Full SNP Model with Covariates

recode2 <- recode[,3:30]
recode2 <- subset(recode2, select=-c(IID.y))
write.table(recode2, "/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/RecodedSNPs.txt", quote=F, sep="\t", row.names=F, col.names=T)
FullSNP <- fread("/Users/keenananderson-fears/Desktop/Dr._Liu_Lab/Consultations/Gandhi/RecodedSNPs2.txt", sep="\t", header=T)
fullSNPmodel <- glm(PHENOTYPE ~ ., data=FullSNP, family=binomial)
summary(fullSNPmodel)
## 
## Call:
## glm(formula = PHENOTYPE ~ ., family = binomial, data = FullSNP)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1650  -1.1503   0.7798   1.0042   2.0117  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   1.111e+00  6.866e-01   1.618  0.10557   
## V1           -3.323e-04  9.457e-04  -0.351  0.72529   
## rs1130866_C   1.699e-02  1.985e-01   0.086  0.93179   
## rs3024798_A  -1.013e+00  5.297e-01  -1.912  0.05583 . 
## rs2077079_C   5.046e-01  4.996e-01   1.010  0.31252   
## rs4715_A      1.717e-02  3.374e-01   0.051  0.95942   
## rs1124_A     -4.358e-02  3.270e-01  -0.133  0.89397   
## rs1965708_A  -5.409e-01  4.096e-01  -1.321  0.18665   
## rs1965707_T   3.951e-01  3.608e-01   1.095  0.27344   
## rs17886395_C -4.705e-01  1.549e-01  -3.038  0.00238 **
## rs1059046_C   3.724e-02  3.543e-01   0.105  0.91628   
## rs1059047_C   1.322e+00  6.173e-01   2.142  0.03222 * 
## rs1136450_C  -3.916e-01  3.242e-01  -1.208  0.22704   
## rs1136451_G  -9.724e-02  5.615e-01  -0.173  0.86250   
## rs1059057_G  -5.101e-01  7.099e-01  -0.719  0.47244   
## rs4253527_T   3.403e-01  5.184e-01   0.656  0.51154   
## rs2243639_A  -4.522e-01  2.261e-01  -2.000  0.04545 * 
## rs721917_C    1.516e-01  1.910e-01   0.794  0.42724   
## Sex           2.299e-01  2.178e-01   1.056  0.29103   
## Smoking      -1.011e-01  3.222e-01  -0.314  0.75365   
## Pets          6.799e-02  3.311e-01   0.205  0.83730   
## Infection    -1.643e-01  2.242e-01  -0.733  0.46356   
## Ancestry_6   -1.788e-02  3.824e-01  -0.047  0.96271   
## Ancestry_3    7.565e-02  7.085e-01   0.107  0.91496   
## Ancestry_5   -2.366e-01  7.960e-01  -0.297  0.76628   
## Ancestry_4   -1.437e+01  5.354e+02  -0.027  0.97860   
## Ancestry_1   -1.507e+00  1.220e+00  -1.235  0.21685   
## Age          -7.812e-03  2.029e-02  -0.385  0.70020   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 548.30  on 401  degrees of freedom
## Residual deviance: 513.25  on 374  degrees of freedom
## AIC: 569.25
## 
## Number of Fisher Scoring iterations: 12

In the full SNP model which performs a logistic regression on all non-multi-allelic SNPs three SNPs are found to be significant, specifically rs17886395 (0.00238), rs1059047 (0.03222) & rs2243639 (0.04545).

Full SNP Model w/o Covariates

FullSNP2 <- FullSNP[,1:18]
fullSNPmodel2 <- glm(PHENOTYPE ~ ., data=FullSNP2, family="binomial")
summary(fullSNPmodel2)
## 
## Call:
## glm(formula = PHENOTYPE ~ ., family = "binomial", data = FullSNP2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1703  -1.1845   0.7749   0.9971   1.6724  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)   1.2443551  0.5162157   2.411   0.0159 * 
## V1           -0.0001606  0.0009266  -0.173   0.8624   
## rs1130866_C  -0.0100724  0.1959957  -0.051   0.9590   
## rs3024798_A  -1.0105575  0.5206080  -1.941   0.0522 . 
## rs2077079_C   0.5060630  0.4871903   1.039   0.2989   
## rs4715_A      0.0339179  0.3321206   0.102   0.9187   
## rs1124_A     -0.0453215  0.3213087  -0.141   0.8878   
## rs1965708_A  -0.5480128  0.4054379  -1.352   0.1765   
## rs1965707_T   0.4094321  0.3584099   1.142   0.2533   
## rs17886395_C -0.4721751  0.1528249  -3.090   0.0020 **
## rs1059046_C   0.0516911  0.3514990   0.147   0.8831   
## rs1059047_C   1.2753842  0.6108299   2.088   0.0368 * 
## rs1136450_C  -0.3737685  0.3200567  -1.168   0.2429   
## rs1136451_G  -0.1440679  0.5530747  -0.260   0.7945   
## rs1059057_G  -0.4163238  0.6991972  -0.595   0.5516   
## rs4253527_T   0.3598993  0.5130949   0.701   0.4830   
## rs2243639_A  -0.4567987  0.2241474  -2.038   0.0416 * 
## rs721917_C    0.1773194  0.1885183   0.941   0.3469   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 548.30  on 401  degrees of freedom
## Residual deviance: 519.56  on 384  degrees of freedom
## AIC: 555.56
## 
## Number of Fisher Scoring iterations: 4

In the second full SNP model we simply removed the covariates and ran the logistic regression, with rs17886395 (0.0020), rs1059047 (0.0368) & rs2243639 (0.0416) being found to be significant again with similar p-values as in the original model with covariates.

SNP1odds <- exp(fullSNPmodel$coefficients)
SNP1Data <- as.data.frame(cbind("Name"=matrix(names(fullSNPmodel$coefficients), ncol=1),
                                "OR"=na.omit(unname(SNP1odds)),
                                "P"=matrix(coef(summary(fullSNPmodel))[,4], ncol=1),
                                "2.5%"=matrix(exp(na.omit(confint.default(fullSNPmodel, level=0.95)))[,1]),
                                "97.5%"=matrix(exp(na.omit(confint.default(fullSNPmodel, level=0.95)))[,2])))
SNP1Data = SNP1Data[-c(1,2,19,20,21,22,23,24,25,26,27,28),]
colnames(SNP1Data)[colnames(SNP1Data) == "V1"] <- "Variant"
colnames(SNP1Data)[colnames(SNP1Data) == "V2"] <- "Beta"
colnames(SNP1Data)[colnames(SNP1Data) == "V3"] <- "P"
colnames(SNP1Data)[colnames(SNP1Data) == "V4"] <- "2.5%"
colnames(SNP1Data)[colnames(SNP1Data) == "V5"] <- "97.5%"
rownames(SNP1Data) <- NULL
SNP2odds <- exp(fullSNPmodel2$coefficients)
SNP2Data <- as.data.frame(cbind("Name"=matrix(names(fullSNPmodel2$coefficients), ncol=1),
                                "OR"=na.omit(unname(SNP2odds)),
                                "P"=matrix(coef(summary(fullSNPmodel2))[,4], ncol=1),
                                "2.5%"=matrix(exp(na.omit(confint.default(fullSNPmodel2, level=0.95)))[,1]),
                                "97.5%"=matrix(exp(na.omit(confint.default(fullSNPmodel2, level=0.95)))[,2])))
SNP2Data = SNP2Data[-c(1,2),]
colnames(SNP2Data)[colnames(SNP2Data) == "V1"] <- "Variant"
colnames(SNP2Data)[colnames(SNP2Data) == "V2"] <- "Beta"
colnames(SNP2Data)[colnames(SNP2Data) == "V3"] <- "P"
colnames(SNP2Data)[colnames(SNP2Data) == "V4"] <- "2.5%"
colnames(SNP2Data)[colnames(SNP2Data) == "V5"] <- "97.5%"
rownames(SNP2Data) <- NULL
SNP1Data %>%
  kbl(caption = "Odds Ratios & CIs of Non-Multiallelic SNPs with Covariates") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Odds Ratios & CIs of Non-Multiallelic SNPs with Covariates
Variant OR P 2.5% 97.5%
rs1130866_C 1.01713137380399 0.93179155114509 0.689364039980758 1.50074006123857
rs3024798_A 0.363159983221053 0.0558349614912301 0.128598683540082 1.02555617042541
rs2077079_C 1.65627854574515 0.312516484467051 0.622119244840757 4.40953827396524
rs4715_A 1.0173142421815 0.959419214169963 0.525155184987143 1.97070941491447
rs1124_A 0.957356091574371 0.893974786952101 0.504359614143277 1.81721664537199
rs1965708_A 0.58225176416397 0.186650485381822 0.260908600108923 1.29937118489204
rs1965707_T 1.48456342243797 0.273435569918901 0.731985305690184 3.01089180084381
rs17886395_C 0.624689514776487 0.00238264742216896 0.461138114190819 0.846247529455357
rs1059046_C 1.03794281125211 0.916276369990059 0.518364104439841 2.0783176732388
rs1059047_C 3.75110240747731 0.0322163391738038 1.11872860611497 12.5774644489032
rs1136450_C 0.675958522895938 0.227041975841574 0.358072296015071 1.27605494689382
rs1136451_G 0.907335104655631 0.862504976224902 0.301871019448445 2.72718127644262
rs1059057_G 0.600448390204921 0.4724419413366 0.14934789968379 2.41408329185102
rs4253527_T 1.40535329458134 0.511540625011172 0.508786711288969 3.88181892091303
rs2243639_A 0.63619550471594 0.0454498497726392 0.408467831006509 0.990885179925761
rs721917_C 1.16370342230608 0.42723896481475 0.80038203451961 1.69194909016129
SNP2Data %>%
  kbl(caption = "Odds Ratios & CIs of Non-Multiallelic SNPs without Covariates") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Odds Ratios & CIs of Non-Multiallelic SNPs without Covariates
Variant OR P 2.5% 97.5%
rs1130866_C 0.989978133172503 0.959013928612707 0.67420772881068 1.45364204870294
rs3024798_A 0.364015993228228 0.0522449059258775 0.131213389480167 1.00986373304503
rs2077079_C 1.65874778631325 0.298926736870718 0.638385762856372 4.3100024760705
rs4715_A 1.03449970540474 0.918657189623131 0.539546354148601 1.98349897511817
rs1124_A 0.955690158670641 0.887828155473869 0.509118250796679 1.79397159294662
rs1965708_A 0.578097455162327 0.176485173522799 0.261151007818047 1.27970659756368
rs1965707_T 1.50596226549719 0.25330558107754 0.745993542089207 3.04013669977618
rs17886395_C 0.623644299123989 0.00200393917080188 0.462223659975525 0.841437264051878
rs1059046_C 1.05305042339325 0.883085395263228 0.52875278689298 2.09722808408225
rs1059047_C 3.58007656832181 0.0368020558030007 1.08131688979259 11.8530916848116
rs1136450_C 0.688136174363845 0.242879456595185 0.367486717988902 1.28856737206597
rs1136451_G 0.865828966282129 0.794489369138601 0.292855965801092 2.55982423578951
rs1059057_G 0.659466689231917 0.551555288078532 0.167507834352524 2.59627447210174
rs4253527_T 1.43318501780874 0.483035768181844 0.524270140977851 3.9178643503144
rs2243639_A 0.633307789893682 0.0415560637378452 0.408150362213138 0.982674018871935
rs721917_C 1.1940124486883 0.346912270507755 0.825166866510544 1.72773021492183

The odds ratios for the SNP model with covariates are, rs17886395 (0.6246), rs1059047 (3.751), and rs2243639 (0.6361). Odds ratios for the SNP model without covariates are rs17886395 (0.6236), rs1059047 (3.580), and rs2243639 (0.6333).